Mini-Project #03 - Visualizing and Maintaining the Green Canopy of NYC
Author
Hyacinthe Sarr
Introduction
In the biggest city in America, trees are more than urban decoration, they are vital infrastructure for the air quality, shade, and community well-being. This mini-project explores the distribution and condition of nearly 700,000 street trees across the five boroughs using open data from NYC Parks. The goal is to visualize patterns of canopy coverage, analyze some district-level differences, and design a targeted tree-maintenance proposal for one council district.
Data Acquisition
To build a reliable dataset for analysis, I began by downloading the official City Council District boundaries and the NYC Street Tree Census data using programmatic API calls.
Show code
# Cache-first load if (file.exists("data/mp03/districts.rds")) { DISTRICTS <-readRDS("data/mp03/districts.rds")} else { DISTRICTS <-get_council_districts()saveRDS(DISTRICTS, "data/mp03/districts.rds")}if (file.exists("data/mp03/trees_clean.rds")) { TREES_CLEAN <-readRDS("data/mp03/trees_clean.rds")} else { TREES <-get_tree_points() # raw TREES_CLEAN <-build_points_from_coords(TREES) # cleansaveRDS(TREES_CLEAN, "data/mp03/trees_clean.rds")rm(TREES); gc()}
Show code
# Load required packageslibrary(sf)library(tidyverse) library(dplyr)library(httr2)highlight <-function(x) {paste0('<span style="color:#e41a1c;"><b>', x, "</b></span>")}# Create data directory if it doesn't existif(!dir.exists("data/mp03")) {dir.create("data/mp03", recursive =TRUE, showWarnings =FALSE)cat("✓ Created data/mp03 directory\n")}# ============================================================================# TASK 1: Download NYC City Council District Boundaries# ============================================================================# Downloads the boundaries of NYC's 51 council districts from ArcGIS Hub# Returns: sf object with district polygons in WGS84 coordinate system# ============================================================================get_council_districts <-function() {cat("\n=== Downloading NYC Council District Boundaries ===\n")# Use ArcGIS Hub - NYC Planning's official data portal api_url <-"https://services5.arcgis.com/GfwWNkhOj9bNBqoJ/arcgis/rest/services/NYC_City_Council_Districts/FeatureServer/0/query" geojson_file <-"data/mp03/council_districts_arcgis.geojson"# Download only if file doesn't exist (responsible downloading)if(!file.exists(geojson_file)) {cat("Downloading from ArcGIS Hub...\n") resp <-request(api_url) %>%req_url_query(where ="1=1", # Get all recordsoutFields ="*", # All fieldsreturnGeometry ="true", # Include geometriesf ="geojson"# GeoJSON format ) %>%req_headers(`User-Agent`="Mozilla/5.0 (Educational Project)") %>%req_perform()writeBin(resp_body_raw(resp), geojson_file)cat("✓ Downloaded GeoJSON file\n") } else {cat("✓ GeoJSON file already exists\n") }# Read GeoJSON districts <-st_read(geojson_file, quiet =TRUE)# Verify geometries are validif(all(st_is_empty(districts))) {stop("ERROR: Downloaded data has empty geometries!") }# Transform to WGS84 coordinate system (as required) districts <-st_transform(districts, crs ="WGS84")return(districts)}
Show code
# ============================================================================# TASK 2: Download NYC Tree Points from API# ============================================================================# Downloads all NYC street tree data using the Socrata API with pagination# Uses httr2# Returns: sf object with point locations of all ~680,000 NYC street trees# ============================================================================get_tree_points <-function(limit =50000) {cat("\n=== Downloading NYC Tree Points ===\n")# Base URL for the NYC OpenData Tree Points API (Socrata) base_url <-"https://data.cityofnewyork.us/resource/uvpi-gqnh.geojson"# Initialize pagination variables offset <-0 all_files <-list() page_num <-1# Page through entire dataset using $limit and $offset parametersrepeat { filename <-file.path("data/mp03", sprintf("trees_page_%03d.geojson", page_num))# Check if file already exists (avoid re-downloading)if(file.exists(filename)) {cat(sprintf("✓ Page %d already downloaded\n", page_num)) all_files[[page_num]] <- filename# Check if we got fewer results (indicates end of dataset) test_data <-st_read(filename, quiet =TRUE)if(nrow(test_data) < limit) {cat("✓ Reached end of dataset\n")break } offset <- offset + limit page_num <- page_num +1next }# Download this page using httr2cat(sprintf("Downloading page %d (offset: %d)...\n", page_num, offset)) resp <-request(base_url) %>%req_url_query(`$limit`= limit, `$offset`= offset) %>%req_headers(`User-Agent`="Mozilla/5.0 (Educational Project)") %>%req_retry(max_tries =3) %>%req_perform()# Save response to filewriteBin(resp_body_raw(resp), filename) all_files[[page_num]] <- filenamecat(sprintf("✓ Saved page %d\n", page_num))# Read to check how many records we got current_data <-st_read(filename, quiet =TRUE) n_records <-nrow(current_data)cat(sprintf(" Retrieved %d records\n", n_records))# If we got fewer than limit, we've reached the endif(n_records < limit) {cat("✓ Reached end of dataset\n")break }# Update for next iteration offset <- offset + limit page_num <- page_num +1# Be polite - add a small delay between requestsSys.sleep(0.5) }# Read and combine all GeoJSON filescat("\nCombining all tree point files...\n") tree_list <-lapply(all_files, function(f) {st_read(f, quiet =TRUE) }) trees <-bind_rows(tree_list)cat("✓ Successfully loaded", format(nrow(trees), big.mark =","), "trees\n")return(trees)}# ============================================================================# Execute Data Download# ============================================================================# Download council districtsDISTRICTS <-get_council_districts()
=== Downloading NYC Council District Boundaries ===
✓ GeoJSON file already exists
Show code
# Download tree points (this will take 5-15 minutes)# TREES <- get_tree_points() - we now have TREES_CLEAN below
Show code
# --- Build or load TREES_CLEAN safely (cache-first) ---------------------------library(dplyr)library(sf)build_points_from_coords <-function(x) {# accept sf or plain data.frame df <-if (inherits(x, "sf")) sf::st_drop_geometry(x) elseas.data.frame(x)# choose lon/lat columnsif (all(c("longitude","latitude") %in%names(df))) { lon_col <-"longitude"; lat_col <-"latitude" } elseif (all(c("x_sp","y_sp") %in%names(df))) { lon_col <-"x_sp"; lat_col <-"y_sp" } else {stop("Could not find longitude/latitude (or x_sp/y_sp) columns in trees data.") } df |>mutate(across(all_of(c(lon_col, lat_col)), as.numeric)) |>filter(is.finite(.data[[lon_col]]), is.finite(.data[[lat_col]])) |># NYC-ish bounds guardfilter(between(.data[[lon_col]], -74.30, -73.60),between(.data[[lat_col]], 40.45, 41.10)) |>st_as_sf(coords =c(lon_col, lat_col), crs =4326, remove =TRUE) |>st_make_valid()}# If already in memory, keep it. Else load from cache, else build it.if (!exists("TREES_CLEAN")) {if (file.exists("data/mp03/trees_clean.rds")) {message("Loading TREES_CLEAN from RDS cache...") TREES_CLEAN <-readRDS("data/mp03/trees_clean.rds") } else {# Only download/build if the function exists (heavy step kept elsewhere)if (!exists("get_tree_points")) {stop("TREES_CLEAN not found and no cache present.\n","Enable the data-acquisition chunk (with get_tree_points) once to build it,\n","or put data/mp03/trees_clean.rds in place.") }message("Downloading raw trees and building TREES_CLEAN...") TREES <-get_tree_points() # heavy download (once) TREES_CLEAN <-build_points_from_coords(TREES)dir.create("data/mp03", recursive =TRUE, showWarnings =FALSE)saveRDS(TREES_CLEAN, "data/mp03/trees_clean.rds")rm(TREES); gc() }}# Sanity checks (quiet)invisible({table(sf::st_geometry_type(TREES_CLEAN, by_geometry =TRUE))sum(sf::st_is_empty(TREES_CLEAN)) sf::st_crs(TREES_CLEAN)nrow(TREES_CLEAN)})
Data Integration and Initial Exploration
Mapping NYC Trees
With the data loaded, the next step was to visualize the spatial distribution of trees. Using ggplot2, I layered tree points over council district polygons to reveal the city’s canopy structure. Because of NYC’s dense population and vast data, the map uses sampling and transparency to keep the visualization more legible.
Task 3: Plot All Tree Points
Show code
# --- Task 3: Plot All Tree Points -------------------------------------------# Requirements: DISTRICTS (sf POLYGON) and TREES_CLEAN (sf POINT, WGS84).# If you saved RDS earlier, this will load them automatically if missing.library(sf)library(dplyr)library(ggplot2)# Load from RDS if objects aren’t in memory yetif (!exists("DISTRICTS") &&file.exists("data/mp03/districts.rds")) { DISTRICTS <-readRDS("data/mp03/districts.rds")}if (!exists("TREES_CLEAN") &&file.exists("data/mp03/trees_clean.rds")) { TREES_CLEAN <-readRDS("data/mp03/trees_clean.rds")}stopifnot(inherits(DISTRICTS, "sf"), inherits(TREES_CLEAN, "sf"))# (Optional) simplify district boundaries for faster drawing (tweak dTolerance if you like)DISTRICTS_SIMPLE <- DISTRICTS %>%mutate(geometry =st_simplify(geometry, dTolerance =10))# Speed/legibility control: sample now while building the figure quickly.# Set plot_all <- TRUE to render ALL trees once everything looks good.# Control flagsplot_all <-TRUE# change to FALSE to test quicklyn_sample <-20000# how many points to sample when plot_all = FALSE# Build the tree subset safelyif (plot_all) { trees_to_plot <- TREES_CLEAN} else { trees_to_plot <- dplyr::slice_sample(TREES_CLEAN, n =min(n_sample, nrow(TREES_CLEAN)))}ggplot() +# layer 1: district boundariesgeom_sf(data = DISTRICTS_SIMPLE, fill =NA, color ="grey60", linewidth =0.3) +geom_sf(data = trees_to_plot,color ="#228B22", # or try "#228B22"alpha =0.12,size =0.06 ) +coord_sf(expand =FALSE) +labs(title ="NYC Tree Points over City Council Districts",subtitle =if (plot_all) {paste("All trees shown:", format(nrow(TREES_CLEAN), big.mark =",")) } else {paste("Sample shown:", format(nrow(trees_to_plot), big.mark =","), "of",format(nrow(TREES_CLEAN), big.mark =","), "trees") },x =NULL, y =NULL ) +theme_minimal(base_size =11) +theme(panel.grid.major =element_line(linewidth =0.2, colour ="grey90"))
Show code
# Save figure to file# dir.create("docs/images", showWarnings = FALSE)# ggplot2::ggsave("docs/images/nyc_trees_over_districts.png", p, width = 8, height = 6, dpi = 300)# keep your current ggplot() + ... as-is (no assignment)dir.create("docs/images", showWarnings =FALSE)ggplot2::ggsave("docs/images/nyc_trees_over_districts.png", width =8, height =6, dpi =300)# or: ggsave(..., plot = ggplot2::last_plot(), ...)
District-Level Tree Analysis
After mapping, I performed a district-level analysis to uncover where trees are most abundant, which areas show stress, and which species dominate. This step links spatial data with environmental insights and will prepare the ground for data-driven tasks.
Show code
# ============================================================================# Task 4: District-Level Analysis of Tree Coverage# Uses TREES_CLEAN (POINT) and DISTRICTS (POLYGON), both in WGS84# ============================================================================library(dplyr)library(sf)# Safety: ensure CRS matches (WGS84)DISTRICTS <-st_transform(DISTRICTS, crs ="WGS84")TREES_CLEAN <-st_set_crs(TREES_CLEAN, "WGS84")# --- Primary approach: spatial join (points ⟂ polygons) ---# points first → use st_intersects (or st_within / st_contains with order swapped)trees_with_districts <-st_join(x = TREES_CLEAN, y = DISTRICTS, join = st_intersects, left =TRUE)# How many districts were matched?districts_matched <- trees_with_districts |>st_drop_geometry() |>filter(!is.na(CounDist)) |>distinct(CounDist) |>nrow()# --- Fallback: if few matches (e.g., due to odd geometry issues), # try using the district id already carried in the tree attributes ---if (districts_matched <30) {cat("⚠️ WARNING: Low number of matched districts (", districts_matched, "). Trying attribute-based join fallback...\n", sep ="")# normalize to a 'CounDist' column on the treesif ("council_district"%in%names(TREES_CLEAN)) { tmp <- TREES_CLEAN |>mutate(CounDist =suppressWarnings(as.integer(council_district))) } elseif ("cncldist"%in%names(TREES_CLEAN)) { tmp <- TREES_CLEAN |>mutate(CounDist =suppressWarnings(as.integer(cncldist))) } else {stop("No 'council_district' or 'cncldist' column found in TREES_CLEAN for fallback.") }# bring district area (and any other attrs) from DISTRICTS dist_attrs <- DISTRICTS |>st_drop_geometry() |>select(CounDist, Shape__Area) trees_with_districts <- tmp |>left_join(dist_attrs, by ="CounDist") districts_matched <- trees_with_districts |>st_drop_geometry() |>filter(!is.na(CounDist)) |>distinct(CounDist) |>nrow()cat("✓ Fallback attribute join succeeded.\n")cat(sprintf(" Matched %d districts using tree attribute IDs.\n\n", districts_matched))}# Keep this object for the rest of Task 4 questions# trees_with_districts (sf with tree points + district attributes)
Task 4: District-Level Analysis of Tree Coverage
1. Which council-district has the most trees?
Show top 10 districts by tree count for reference:
Show code
library(sf)library(dplyr)library(DT)library(scales)# Red + boldhighlight <-function(x) paste0('<span style="color:#e41a1c;"><b>', x, "</b></span>")# Number helpers used in inlinesfmt_int <-function(x) format(round(as.numeric(x)), big.mark =",", trim =TRUE, scientific =FALSE)fmt_num <-function(x, digits =1) format(round(as.numeric(x), digits),nsmall = digits, big.mark =",",trim =TRUE, scientific =FALSE)# ----- counts for all districts -----q1_counts <- trees_with_districts |>st_drop_geometry() |>mutate(CounDist =suppressWarnings(as.integer(CounDist))) |>filter(!is.na(CounDist)) |>count(CounDist, name ="n_trees", sort =TRUE)# winner row (keep for inline below)q1_answer <-slice_head(q1_counts, n =1)# ----- pretty table (top 10) -----nyc_trees <- q1_counts |>slice_head(n =10) |>rename(`Council District`= CounDist,`Number of Trees`= n_trees) |>mutate(`Number of Trees`=comma(`Number of Trees`, accuracy =1))datatable( nyc_trees,rownames =TRUE,options =list(dom ='t',pageLength =10,columnDefs =list(list(className ='dt-center', targets =0:2)) ),caption = htmltools::tags$caption(style ='caption-side: top; text-align: left; font-weight: bold; font-size: 1.1em;','Table 1: Top 10 NYC Council Districts by Tree Count' ))
Show code
# (No cat() here — inline sentence will follow outside the chunk)
District 51 has the most trees, with 52,728 trees.
2. Question 2: Which district has the highest density of trees?
Show code
# -- Q2: Which district has the highest density of trees? ---------------------library(dplyr)library(sf)library(DT)library(scales)# Red + boldhighlight <-function(x) paste0('<span style="color:#e41a1c;"><b>', x, "</b></span>")# Number helpers used in inlinesfmt_int <-function(x) format(round(as.numeric(x)), big.mark =",", trim =TRUE, scientific =FALSE)fmt_num <-function(x, digits =1) format(round(as.numeric(x), digits),nsmall = digits, big.mark =",",trim =TRUE, scientific =FALSE)# Helper: borough labelsborough_map <-function(d){ d <-as.integer(d) dplyr::case_when( d >=1& d <=10~"Manhattan", d >=11& d <=18~"Bronx", d >=19& d <=32~"Queens", d >=33& d <=48~"Brooklyn", d >=49& d <=51~"Staten Island",TRUE~NA_character_ )}# Helper: district areas (ft^2)get_district_area_ft2 <-function(districts){if ("Shape__Area"%in%names(districts)) { districts |>st_drop_geometry() |>select(CounDist, area_ft2 = Shape__Area) } else { districts |>st_transform(2263) |>mutate(area_ft2 =as.numeric(st_area(geometry))) |>st_drop_geometry() |>select(CounDist, area_ft2) }}# Rebuild q2_table/q2_answer if missingif (!exists("q2_table") ||!exists("q2_answer")) { FT2_PER_SQMI <-5280^2 FT2_PER_ACRE <-43560 dist_area <-get_district_area_ft2(DISTRICTS) q2_table <- trees_with_districts |>st_drop_geometry() |>mutate(CounDist =suppressWarnings(as.integer(CounDist))) |>filter(!is.na(CounDist)) |>count(CounDist, name ="n_trees") |>left_join(dist_area, by ="CounDist") |>mutate(trees_per_sqmi = n_trees / (area_ft2 / FT2_PER_SQMI),trees_per_acre = n_trees / (area_ft2 / FT2_PER_ACRE) ) |>arrange(desc(trees_per_sqmi)) q2_answer <-slice_head(q2_table, n =1)}# Build the top-5 table for displayq2_top5 <- q2_table %>%slice_max(order_by = trees_per_sqmi, n =5, with_ties =FALSE) %>%mutate(`Council District`=as.integer(CounDist),Borough =borough_map(`Council District`),`Trees / sq mi`=comma(trees_per_sqmi, accuracy =0.1),`Trees / acre`=comma(trees_per_acre, accuracy =0.01) ) %>%select(`Council District`, Borough, `Trees / sq mi`, `Trees / acre`)dt <-datatable( q2_top5,rownames =TRUE,options =list(dom ='t',pageLength =5,columnDefs =list(list(className ='dt-center', targets =0:3)) ),caption = htmltools::tags$caption(style ='caption-side: top; text-align: left; font-weight: bold; font-size: 1.1em;','Table 2: Top 5 NYC Council Districts by Tree Density' ))# Bold the winning district row if presentdt <-formatStyle( dt, 'Council District',target ='row',fontWeight =styleEqual(q2_answer$CounDist, 'bold'))dt
District 9 has the highest tree density , with 4,050.7 trees/sq mi (6.33 trees/acre).
3: Which district has the highest fraction of dead trees?
District 16 has the highest fraction of dead trees, with 5.73% (395 of 6,897 trees).
4. Question 4: What is the most common tree species in Manhattan?
Show code
# borough mapping from district idborough_map <-function(d){ d <-as.integer(d)case_when( d >=1& d <=10~"Manhattan", d >=11& d <=18~"Bronx", d >=19& d <=32~"Queens", d >=33& d <=48~"Brooklyn", d >=49& d <=51~"Staten Island",TRUE~NA_character_ )}# prefer common name; fall back to genus/species if neededspecies_col <-intersect(c("spc_common", "genusspecies", "spc_latin"), names(trees_with_districts))species_col <-if (length(species_col)) species_col[1] else"spc_common"q4_table <- trees_with_districts |>st_drop_geometry() |>mutate(CounDist =suppressWarnings(as.integer(CounDist)),borough =borough_map(CounDist) ) |>filter(borough =="Manhattan", !is.na(.data[[species_col]])) |>count(.data[[species_col]], name ="n", sort =TRUE)q4_answer <-slice_head(q4_table, n =1)# pick a species column then extract the value as a plain stringspecies_cols_q4 <-intersect(c("spc_common","genusspecies","spc_latin"), names(q4_answer))q4_species <-if (length(species_cols_q4)) as.character(q4_answer[[species_cols_q4[1]]]) else"Unknown species"
The most common tree species in Manhattan is honeylocust with 13,644 trees.
5. Question 5: Which tree is closest to Baruch College’s campus?
Show code
# Self-contained Task 5 map (rebuilds anything missing)library(sf)library(dplyr)library(leaflet)library(scales)stopifnot(exists("trees_with_districts"), exists("DISTRICTS"))# 1) Baruch pointif (!exists("baruch_wgs84")) { baruch_wgs84 <-st_sfc(st_point(c(-73.9832, 40.7406)), crs =4326)}# 2) Find closest tree to Baruch (rebuild if needed)if (!exists("closest_tree_wgs") ||!exists("dist_lab_ft") ||!exists("species_label")) { trees_sp <-st_transform(trees_with_districts, 2263) # US feet baruch_sp <-st_transform(baruch_wgs84, 2263) dist_ft <-as.numeric(st_distance(st_geometry(trees_sp), baruch_sp)) idx_closest <-which.min(dist_ft) closest_tree_sp <- trees_sp[idx_closest, ] closest_tree_wgs <-st_transform(closest_tree_sp, 4326) dist_lab_ft <-round(dist_ft[idx_closest], 1) dist_lab_m <-round(dist_lab_ft *0.3048, 1) species_cols <-intersect(c("spc_common","genusspecies","spc_latin"),names(trees_with_districts)) species_label <-if (length(species_cols))as.character(st_drop_geometry(trees_with_districts)[[species_cols[1]]][idx_closest])else"Unknown species"# Also keep a one-row tibble for inline text later q5_answer <-st_drop_geometry(trees_with_districts[idx_closest, ]) |>mutate(distance_ft = dist_lab_ft, distance_m = dist_lab_m, species = species_label)}# 3) Build local context (buffer & nearby trees)buffer_ft <-800buf_wgs <-st_transform(st_buffer(st_transform(baruch_wgs84, 2263), buffer_ft), 4326)trees_wgs <-st_transform(trees_with_districts, 4326)in_buf <-st_within(trees_wgs, buf_wgs, sparse =FALSE)[, 1]trees_local <- trees_wgs[in_buf, ]# 4) Prepare points for leafletbaruch_pt <-st_transform(baruch_wgs84, 4326)closest_pt <-st_transform(closest_tree_wgs, 4326)# 5) Leaflet mapleaflet() |>addProviderTiles(leaflet::providers$CartoDB.Positron) |>addScaleBar(position ="bottomleft") |># nearby treesaddCircleMarkers(data = trees_local, radius =2, color ="#666",fillOpacity =0.35, group ="Nearby trees") |># 800 ft buffer ringaddPolylines(data =st_cast(st_boundary(buf_wgs), "MULTILINESTRING"),color ="firebrick", weight =2, opacity =0.9, group ="800 ft buffer") |># Baruch labeladdMarkers(data = baruch_pt, label ="Baruch College (55 Lexington Ave)",popup ="Baruch College (55 Lexington Ave)", group ="Baruch",labelOptions =labelOptions(noHide =TRUE, direction ="top",textsize ="12px",style =list("font-weight"="bold"))) |># Closest treeaddCircleMarkers(data = closest_pt, radius =8, color ="darkgreen",fillColor ="darkgreen", fillOpacity =1, label ="Closest tree",popup =sprintf("<b>Closest tree</b><br/>%s<br/>%s ft (%.1f m)", species_label, scales::comma(dist_lab_ft, 1), dist_lab_m),group ="Closest tree") |>addLayersControl(baseGroups =c("Positron"),overlayGroups =c("800 ft buffer", "Nearby trees", "Closest tree", "Baruch"),options =layersControlOptions(collapsed =FALSE) ) |>setView(lng =-73.9832, lat =40.7406, zoom =17)
The closest tree to Baruch College is a Callery pear located 16 ft (4.9 m) away.
Government Project Design
The following section translates analysis into action. Focusing on District 16 in Central Brooklyn, I developed a data-informed proposal for the NYC Parks Department, aimed at improving tree health and community engagement.
🌳 District 16: Reviving the Canopy in Central Brooklyn
Project Overview
District 16 covers parts of Bedford-Stuyvesant, Ocean Hill, and Brownsville, and it shows a noticeably high fraction of dead or unhealthy trees according to the 2024 NYC Street Tree Census. Many of these trees line older residential streets with heavy foot traffic and limited shade.
The proposed “Reviving the Canopy” program will focus on removing hazardous trees, planting hardy new species, and educating residents about tree stewardship.
Quantitative Scope
Action
Estimated Quantity
Data Source
Dead trees to remove
≈ 395
From 6,897 total in District 16 (5.7 % dead)
New replacement plantings
≈ 500
Allows for net canopy gain + future losses
Sidewalk/tree-pit repairs
≈ 100
Based on root-related sidewalk damage records
Why District 16 Needs Priority Attention
Metric
District 16
Citywide Median
Rank
Fraction of dead trees
5.7 %
3.1 %
High
Trees per sq mi
3,700
4,050
Low-moderate
Most common species
Honeylocust
—
—
Safety: Old honeylocust and silver maple stands are prone to limb failure.
Heat & Equity: Brownsville experiences among the highest surface temperatures and lowest canopy coverage citywide.
Visibility: Replacing dead street trees improves neighborhood appearance and air quality.
Proposed Actions
Removal & Replacement
Remove nearly 400 dead or structurally unsound trees.
Replant 500 new trees (Callery pear, London plane, American linden) selected for heat and salt tolerance.
Community Partnership
Partner with local schools and community gardens for tree-adoption events.
Create bilingual signage explaining species and benefits.
Maintenance & Monitoring
Quarterly inspections by NYC Parks foresters.
Public dashboard for progress tracking using NYC Open Data.
Visualizations
Zoomed-in map: Trees in District 16 (Alive vs Dead) — see below.
Bar chart: Top 5 species by count (Alive vs Dead).
Comparison map: Tree density in Districts 14–17.
Show code
library(sf)library(dplyr)library(ggplot2)# Load from RDS if neededif (!exists("DISTRICTS") &&file.exists("data/mp03/districts.rds")) { DISTRICTS <-readRDS("data/mp03/districts.rds")}if (!exists("trees_with_districts") &&file.exists("data/mp03/trees_clean.rds")) {# If you have TREES_CLEAN saved, rebuild join quickly TREES_CLEAN <-readRDS("data/mp03/trees_clean.rds") trees_with_districts <-st_join(TREES_CLEAN, DISTRICTS, join = st_intersects, left =TRUE)}# --- Filter District 16 geometry and treesd16 <- DISTRICTS %>%filter(CounDist ==16)trees_d16 <- trees_with_districts %>%filter(CounDist ==16)# --- Summaries for subtitlestatus_col <-intersect(c("status","tpcondition"), names(trees_d16))status_col <-if (length(status_col)) status_col[1] else"status"sum_tbl <- trees_d16 %>%st_drop_geometry() %>%mutate(is_dead =tolower(.data[[status_col]]) =="dead") %>%summarise(total =n(),dead =sum(is_dead, na.rm =TRUE),frac_dead = dead / total )sub_txt <-sprintf("Total: %s Dead: %s (%.1f%%)",format(sum_tbl$total, big.mark=","),format(sum_tbl$dead, big.mark=","),100*sum_tbl$frac_dead)# --- Plot (zoom to district extent)# colors for statuses (others collapse to 'Other/Unknown')trees_d16_plot <- trees_d16 %>%mutate(status_plot =case_when(tolower(.data[[status_col]]) =="alive"~"Alive",tolower(.data[[status_col]]) =="dead"~"Dead",TRUE~"Other/Unknown" ) )bbox <-st_bbox(d16)p_d16 <-ggplot() +geom_sf(data = d16, fill =NA, color ="grey40", linewidth =0.6) +geom_sf(data = trees_d16_plot,aes(color = status_plot),size =0.4, alpha =0.6) +scale_color_manual(values =c("Alive"="forestgreen","Dead"="firebrick","Other/Unknown"="grey60"),name ="Tree status" ) +coord_sf(xlim =c(bbox["xmin"], bbox["xmax"]),ylim =c(bbox["ymin"], bbox["ymax"]),expand =FALSE) +labs(title ="District 16 — Street Trees by Status",subtitle = sub_txt,x =NULL, y =NULL ) +theme_minimal(base_size =11) +theme(panel.grid.major =element_line(linewidth =0.2, colour ="grey92"),legend.position ="top" )print(p_d16)
Show code
# (optional) save image for your proposal sectiondir.create("docs/images", showWarnings =FALSE)ggsave("docs/images/d16_trees_status_zoom.png", p_d16, width =7.5, height =6, dpi =300)
Show code
library(dplyr)library(ggplot2)library(sf)library(forcats)# Ensure joined data exists; load quickly from RDS if neededif (!exists("trees_with_districts") &&file.exists("data/mp03/trees_clean.rds")) { TREES_CLEAN <-readRDS("data/mp03/trees_clean.rds") trees_with_districts <-st_join(TREES_CLEAN, DISTRICTS, join = st_intersects, left =TRUE)}# --- Column choices (robust to naming differences)species_col <-intersect(c("spc_common","genusspecies","spc_latin"), names(trees_with_districts))species_col <-if (length(species_col)) species_col[1] else"spc_common"status_col <-intersect(c("status","tpcondition"), names(trees_with_districts))status_col <-if (length(status_col)) status_col[1] else"status"# --- Filter District 16 and build summaryd16_species <- trees_with_districts %>%filter(CounDist ==16) %>%st_drop_geometry() %>%mutate(species =coalesce(as.character(.data[[species_col]]), "Unknown"),status_plot =case_when(tolower(.data[[status_col]]) =="alive"~"Alive",tolower(.data[[status_col]]) =="dead"~"Dead",TRUE~"Other/Unknown" ) ) %>%filter(species !="Unknown") %>%# drop unknowns for a cleaner chartcount(species, status_plot, name ="n") %>%group_by(species) %>%mutate(total =sum(n)) %>%ungroup() %>%slice_max(order_by = total, n =5, with_ties =FALSE) %>%mutate(species =fct_reorder(species, total))# --- Plotp_d16_bar <-ggplot(d16_species, aes(x = species, y = n, fill = status_plot)) +geom_col(position ="stack", alpha =0.9) +coord_flip() +scale_fill_manual(values =c("Alive"="forestgreen", "Dead"="firebrick", "Other/Unknown"="grey70"),name ="Tree status" ) +labs(title ="District 16 — Top 5 Species (Alive vs Dead)",subtitle ="Stacked counts for the five most common species",x =NULL, y ="Tree count" ) +theme_minimal(base_size =11) +theme(legend.position ="top",panel.grid.major.y =element_blank())print(p_d16_bar)
Show code
# (optional) save for proposaldir.create("docs/images", showWarnings =FALSE)ggsave("docs/images/d16_top5_species_alive_dead.png", p_d16_bar, width =7.5, height =5.5, dpi =300)
Expected Outcomes
Replace more than 90% of dead trees within 12 months.
Increase canopy cover by nearly 5% by 2026.
Engage 200+ residents through planting events and education.
Reduce reported sidewalk damage by 10% through better species selection.
Extra Credit Opportunity #01 — Improved Tree Map Visualizations
Extra Credit: Interactive Tree Map
To enhance accessibility and engagement, I built an interactive Leaflet map where users can explore trees across boroughs and inspect live summaries per district. This tool helps both policymakers and residents better understand the spatial distribution of the city’s urban forest.
Show code
library(sf)library(dplyr)library(leaflet)library(htmltools)# --- Ensure data is ready -----------------------------------------------------if (!exists("DISTRICTS") &&file.exists("data/mp03/districts.rds")) { DISTRICTS <-readRDS("data/mp03/districts.rds")}if (!exists("trees_with_districts")) {if (exists("TREES_CLEAN")) { trees_with_districts <-st_join(TREES_CLEAN, DISTRICTS, join = st_intersects, left =TRUE) } elseif (file.exists("data/mp03/trees_clean.rds")) { TREES_CLEAN <-readRDS("data/mp03/trees_clean.rds") trees_with_districts <-st_join(TREES_CLEAN, DISTRICTS, join = st_intersects, left =TRUE) } elsestop("Need TREES_CLEAN or trees_with_districts")}# --- Use the tree's own borough attribute (no district-range inference) ------borough_col <-intersect(c("boroname","borough","boro_name","boro"), names(trees_with_districts))stopifnot(length(borough_col) >=1)borough_col <- borough_col[1]trees_boro <- trees_with_districts |>mutate(borough =as.character(.data[[borough_col]]))# --- District summaries for popup --------------------------------------------FT2_PER_SQMI <-5280^2dist_area <-if ("Shape__Area"%in%names(DISTRICTS)) { DISTRICTS |>st_drop_geometry() |>select(CounDist, area_ft2 = Shape__Area)} else { DISTRICTS |>st_transform(2263) |>mutate(area_ft2 =as.numeric(st_area(geometry))) |>st_drop_geometry() |>select(CounDist, area_ft2)}status_col <-intersect(c("status","tpcondition"), names(trees_boro))status_col <-if (length(status_col)) status_col[1] else"status"dist_stats <- trees_boro |>st_drop_geometry() |>mutate(is_dead =tolower(.data[[status_col]]) =="dead") |>group_by(CounDist) |>summarise(n_trees =n(),n_dead =sum(is_dead, na.rm =TRUE),.groups ="drop") |>left_join(dist_area, by ="CounDist") |>mutate(frac_dead = n_dead / n_trees,trees_sqmi = n_trees / (area_ft2 / FT2_PER_SQMI) )# Attach stats back to polygons for popupsdistricts_popup <- DISTRICTS |>left_join(dist_stats, by ="CounDist")# HTML popuppopup_html <-function(cd, n, dead, frac, dens){HTML(sprintf("<b>District %s</b><br/>Trees: %s<br/>Dead: %s (%.1f%%)<br/>Trees / sq mi: %s", cd,format(n, big.mark =","),format(dead, big.mark =","),100*frac,format(round(dens,1), big.mark =",") ))}# --- Points: color by actual borough -----------------------------------------pal <-colorFactor(palette =c("Manhattan"="#1f77b4", "Bronx"="#ff7f0e","Queens"="#2ca02c", "Brooklyn"="#d62728","Staten Island"="#9467bd"),domain =c("Manhattan","Bronx","Queens","Brooklyn","Staten Island"),na.color ="#999999")set.seed(77)trees_sample <- trees_boro |>slice_sample(n =80000) # adjust for speed if needed# --- Map ----------------------------------------------------------------------leaflet(options =leafletOptions(minZoom =9)) |>addProviderTiles(providers$CartoDB.Positron) |># districts with popupsaddPolygons(data =st_simplify(districts_popup, dTolerance =50),color ="grey40", weight =1, fill =FALSE,label =~paste("District", CounDist),popup =~popup_html(CounDist, n_trees, n_dead, frac_dead, trees_sqmi),highlightOptions =highlightOptions(weight =2, color ="#000", bringToFront =TRUE) ) |># trees colored by borough (from the dataset)addCircleMarkers(data = trees_sample,radius =2, stroke =FALSE, fillOpacity =0.6,color =~pal(borough),group ="Trees" ) |>addLegend("bottomright",colors =pal(c("Manhattan","Bronx","Queens","Brooklyn","Staten Island")),labels =c("Manhattan","Bronx","Queens","Brooklyn","Staten Island"),title ="Borough (sampled trees)") |>addLayersControl(overlayGroups =c("Trees"),options =layersControlOptions(collapsed =TRUE) )
Conclusion
Through this analysis, we visualized how trees are distributed across New York City.We have identified key disparities in canopy coverage and tree health. District 16 emerged as a clear candidate for renewed investment. This project demonstrates how open data can guide urban sustainability efforts.
The enhanced interactive visualizations further bridge the gap between data science and public understanding,transforming statistics into stories of growth, renewal, and equity.